# [This is an unfinished DRAFT version !!!]

# Title: "Replication R code for 'Magnitude Matters: Voter Turnout Under Different Electoral Systems' [MAIN ESTIMATES]"
# author: "Michał Pierzgalski"
# date: "29.04.2025"


# Load packages

library(tidyverse)
library(ggpubr)
library(cowplot)
library(ggbreak)
library(panelView)
library(rdrobust)
library(rddtools)
library(rddensity)
library(rdpower)
library(stringr)
library(xtable)  # For creating LaTeX tables


# START; load data set !!!

load("pl_local_elec_1998_2018.rdd.data_27_04_25.RData") # !!!

# DATA PREP ----

# ADD pre-determined covariates !!!

# pop

pop_allyears.sub <- pop_allyears |> filter(m_type %in% c(1, 2, 3)) |> drop_na("pop") # No data on population for some municipalities
inc_m1_abs_sub <- inc_m1_abs |> select(year, TERYT, inc_m1_abs, m_type)
pop_covs_data <- pop_allyears.sub %>% left_join(., inc_m1_abs_sub) |> mutate(inc_m1_pc = inc_m1_abs/pop) # !!!

data.teryt.summary.rdd.1.1.1 <- data.teryt.summary.rdd.1.1 %>% left_join(., pop_covs_data) # !!!

# popw

pop_women <- pop_women %>% mutate(popw = popw_1000*1000)
data.teryt.summary.rdd.1.1.1 <- data.teryt.summary.rdd.1.1.1 %>% left_join(., pop_women) %>% mutate(popw_share = popw/pop) # !!!

# ha + agro + exp_inv + density

# data.teryt.summary.rdd.1.1.1 <- data.teryt.summary.rdd.1.1.1 %>% left_join(., areah) %>% left_join(., agro) %>% left_join(., exp_inv) %>% mutate(agro_share = agro/areah, exp_inv_share = exp_inv/pop, density_h = pop/areah) # !!!


# data_exec (BARTNICKI data set)

# data.teryt.summary.rdd.1.2 <- data.teryt.summary.rdd.1.1.1 %>% left_join(., slawek_main_data_set) # !!!

data.teryt.summary.rdd.1.2 <- data.teryt.summary.rdd.1.1.1

# Rename and select

data.teryt.summary.rdd.1.2 <- data.teryt.summary.rdd.1.2 %>% 
  rename(comm.t = type)

data.teryt.summary.rdd.1.2 <- data.teryt.summary.rdd.1.2 %>%
  rename(a.method.d = ES.d)

# frek_odd -- frekwencja w I turze w wyborach egzekutyw (2002, 2006, 2010, 2014)

# data.teryt.summary.rdd.1.2 <- data.teryt.summary.rdd.1.2 %>% rename(turnout_r1_exec = frek_odd)


# Select main vars !!!

data.teryt.summary.rdd.1.3 <- data.teryt.summary.rdd.1.2 |> select(
  year,
  TERYT,
  municipality_name,
  m_type,
  comm.t.1998,
  #  comm.t.2014,
  comm.t,
  #  commune_type,
  level.d,
  pop1998,
  pop2014,
  pop,
  popw,
  popw_share,
  elig.voter.d,
  valid.v.d,
  invalid.v.d,
  ballots.valid.d,
  v.rule.d,
  v.rule.d.2,
  a.method.d,
  DM,
  turnout,
#  turnout_r1_exec,
  num.parties.d,
  num.cand.d,
  num.uncontest.dist,
  TS.leg,
  num.districts,
  vs,
  mv,
  wt,
  mv.d,
#  a2t,
  unemp,
  comm.income,
  inc_m1_abs,
  inc_m1_pc,
#  areah,
#  agro,
#  agro_share,
#  exp_inv_share,
#  density_h
) %>% filter(ballots.valid.d != 0) # !!!


# RDD ESTIMATES for (1998) 2002, 2006, 2010 elections ----

# SMDP : (1998) 2002, 2006, 2010

data.teryt.summary.rdd.2_e20_smdp <- data.teryt.summary.rdd.1.3 %>%
  filter(
    pop > 0,
    m_type %in% c(1,2,3),
    year %in% c(1998, 2002, 2010, 2006),
    mv.d >= 0, # MV adjust
    !(v.rule.d == "Non-PR" & pop > 20000),
    !(v.rule.d == "PR" &
        pop <= 20000),
    (DM %in% c(1) &
       v.rule.d == "Non-PR") |
      (DM %in% c(5:10) & v.rule.d == "PR")
  ) %>% mutate(
    n.cand.per.seat = num.cand.d / DM,
    npart.seat.d = num.parties.d / DM,
    Treated = if_else(pop > 20000, 1, 0)
  )



# MMDP : (1998) 2002, 2006, 2010

data.teryt.summary.rdd.2_e20_mmdp <- data.teryt.summary.rdd.1.3 %>%
  filter(
    pop > 0,
    m_type %in% c(1,2,3),
    year %in% c(1998, 2002, 2010, 2006),
    mv.d >= 0,
    !(v.rule.d == "Non-PR" & pop > 20000),
    !(v.rule.d == "PR" &
        pop <= 20000),
    DM %in% c(2:10) # DM Adjust !!!
  ) %>% mutate(
    n.cand.per.seat = num.cand.d / DM,
    npart.seat.d = num.parties.d / DM,
    Treated = if_else(pop > 20000, 1, 0)
  )


# Data filter 2

# smdp

data.teryt.summary.rdd.3_e20_smdp <-
  data.teryt.summary.rdd.2_e20_smdp %>% filter(
    pop > 0 & pop < 40000,
    #    pop > 0 & pop < 19900 | pop > 20100 & pop < 40000, # DONUT hole 1%
    #    pop > 0 & pop < 19800 | pop > 20200 & pop < 40000, # DONUT hole 2%
    #    pop > 0 & pop < 19700 | pop > 20300 & pop < 40000, # DONUT hole 3%
    #    pop > 0 & pop < 19600 | pop > 20400 & pop < 40000, # DONUT hole 4%
    #    pop > 0 & pop < 19500 | pop > 20500 & pop < 40000, # DONUT hole 5%
    #    pop > 0 & pop < 19400 | pop > 20600 & pop < 40000, # DONUT hole 6%
    #    pop > 0 & pop < 19300 | pop > 20700 & pop < 40000, # DONUT hole 7%
    #    pop > 0 & pop < 19200 | pop > 20800 & pop < 40000, # DONUT hole 8%
    #  pop > 0 & pop < 19100 | pop > 20900 & pop < 40000, # DONUT hole 9%
    #  pop > 0 & pop < 19000 | pop > 21000 & pop < 40000, # DONUT hole 10%
    a.method.d %in% c("SMDP", "DHondt"), mv.d > 0) # SMDP !!!


# mmdp

data.teryt.summary.rdd.3_e20_mmdp <- data.teryt.summary.rdd.2_e20_mmdp %>%
  filter(
    pop > 0 & pop < 40000,
    #    pop > 0 & pop < 19900 | pop > 20100 & pop < 40000, # DONUT hole 1%
    #    pop > 0 & pop < 19800 | pop > 20200 & pop < 40000, # DONUT hole 2%
    #    pop > 0 & pop < 19700 | pop > 20300 & pop < 40000, # DONUT hole 3%
    #    pop > 0 & pop < 19600 | pop > 20400 & pop < 40000, # DONUT hole 4%
    #    pop > 0 & pop < 19500 | pop > 20500 & pop < 40000, # DONUT hole 5%
    #    pop > 0 & pop < 19400 | pop > 20600 & pop < 40000, # DONUT hole 6%
    #   pop > 0 & pop < 19300 | pop > 20700 & pop < 40000, # DONUT hole 7%
    #    pop > 0 & pop < 19200 | pop > 20800 & pop < 40000, # DONUT hole 8%
    #  pop > 0 & pop < 19100 | pop > 20900 & pop < 40000, # DONUT hole 9%
    #  pop > 0 & pop < 19000 | pop > 21000 & pop < 40000, # DONUT hole 10%
    a.method.d %in% c("MMDP", "DHondt"), mv.d > 0) # MMDP !!!


#

data.teryt.summary.rdd.3_e20_smdp <- data.teryt.summary.rdd.3_e20_smdp %>%
  mutate(
    comm.t.disc = case_when(m_type == 1 ~ 3,
                            m_type == 3 ~ 2,
                            m_type == 2 ~ 1)
  )

data.teryt.summary.rdd.3_e20_mmdp <- data.teryt.summary.rdd.3_e20_mmdp %>%
  mutate(
    comm.t.disc = case_when(m_type == 1 ~ 3,
                            m_type == 3 ~ 2,
                            m_type == 2 ~ 1)
  )



# COVS. !!!

covs.rdd_smdp = cbind.data.frame(
  #  comm.t.disc = data.teryt.summary.rdd.3_e20_smdp$comm.t.disc,
  year = data.teryt.summary.rdd.3_e20_smdp$year
  #  density = data.teryt.summary.rdd.3_e20_smdp$density_h,
  #  popw = data.teryt.summary.rdd.3_e20_smdp$popw_share
)

covs.rdd_mmdp = cbind.data.frame(
  #  comm.t.disc = data.teryt.summary.rdd.3_e20_mmdp$comm.t.disc,
  year = data.teryt.summary.rdd.3_e20_mmdp$year
  #  density = data.teryt.summary.rdd.3_e20_mmdp$density_h,
  #  popw = data.teryt.summary.rdd.3_e20_mmdp$popw_share
)



# RDD PLOTS !!!

rdd_out.1 <-
  rdplot(
    data.teryt.summary.rdd.3_e20_smdp$turnout,
    data.teryt.summary.rdd.3_e20_smdp$pop,
    kernel = "triangular",
    covs_eval = "mean",
    p = 4,
    c = 20000,
    x.label = "Population",
    y.label = "Turnout",
    binselect = "qs"
  )

rdd_out.1 <-
  rdplot(
    data.teryt.summary.rdd.3_e20_mmdp$turnout,
    data.teryt.summary.rdd.3_e20_mmdp$pop,
    kernel = "triangular",
    covs_eval = "mean",
    p = 4,
    c = 20000,
    x.label = "Population",
    y.label = "Turnout",
    binselect = "qs"
  )



# INFERENCE (rdrobust) ----

### Models (Table 2 and Figure 5)

# smdp

rdd_inf_out.smdp_pool_mse.cov <- rdrobust(
  data.teryt.summary.rdd.3_e20_smdp$turnout,
  data.teryt.summary.rdd.3_e20_smdp$pop,
  kernel = "triangular",
  bwselect = "msetwo",
  covs = covs.rdd_smdp, # !!!
  c = 20000,
  p = 1,
  q = 2,
  masspoints = "adjust",
  bwcheck = 10,
  all = T,
  stdvars = F,
  deriv = 0,
  cluster = data.teryt.summary.rdd.3_e20_smdp$m_type,
  #  h = c(1924, 2863), # Fixed MSE band for DONUT !!!
  #  b = c(5154, 6062) # Fixed MSE band for DONUT !!!
)

summary(rdd_inf_out.smdp_pool_mse.cov)


rdd_inf_out.smdp_pool_cer.cov <- rdrobust(
  data.teryt.summary.rdd.3_e20_smdp$turnout,
  data.teryt.summary.rdd.3_e20_smdp$pop,
  kernel = "triangular",
  bwselect = "certwo",
  covs = covs.rdd_smdp, # !!!
  c = 20000,
  p = 1,
  q = 2,
  masspoints = "adjust",
  bwcheck = 10,
  all = T,
  stdvars = F,
  deriv = 0,
  cluster = data.teryt.summary.rdd.3_e20_smdp$m_type,
  #  h = c(1759.416, 2617.248), # Fixed CER band for DONUT !!!
  #  b = c(5154.053, 6062.418) # Fixed CER band for DONUT !!!
)

summary(rdd_inf_out.smdp_pool_cer.cov)


# mmdp

rdd_inf_out.mmdp_pool_mse.cov <- rdrobust(
  data.teryt.summary.rdd.3_e20_mmdp$turnout,
  data.teryt.summary.rdd.3_e20_mmdp$pop,
  kernel = "triangular",
  bwselect = "msetwo",
  covs = covs.rdd_mmdp,
  c = 20000,
  p = 1,
  q = 2,
  masspoints = "adjust",
  bwcheck = 10,
  all = T,
  stdvars = F,
  deriv = 0,
  cluster = data.teryt.summary.rdd.3_e20_mmdp$m_type,
  #  h = c(1434.746, 2835.929), # Fixed MSE band for DONUT !!!
  #  b = c(3648.768, 5884.218) # Fixed MSE band for DONUT !!!
)

summary(rdd_inf_out.mmdp_pool_mse.cov)


rdd_inf_out.mmdp_pool_cer.cov <- rdrobust(
  data.teryt.summary.rdd.3_e20_mmdp$turnout,
  data.teryt.summary.rdd.3_e20_mmdp$pop,
  kernel = "triangular",
  bwselect = "certwo",
  covs = covs.rdd_mmdp,
  c = 20000,
  p = 1,
  q = 2,
  masspoints = "adjust",
  bwcheck = 10,
  all = T,
  stdvars = F,
  deriv = 0,
  cluster = data.teryt.summary.rdd.3_e20_mmdp$m_type,
  #  h = c(1311.800, 2592.912), # Fixed CER band for DONUT !!!
  #  b = c(3648.768, 5884.218) # Fixed CER band for DONUT !!!
)

summary(rdd_inf_out.mmdp_pool_cer.cov)


# BASE models !!!

# smdp

rdd_inf_out.smdp_pool_mse.cov_base <- rdrobust(
  data.teryt.summary.rdd.3_e20_smdp$turnout,
  data.teryt.summary.rdd.3_e20_smdp$pop,
  kernel = "triangular",
  bwselect = "msetwo",
  covs = covs.rdd_smdp, # !!!
  c = 20000,
  p = 1,
  q = 2,
  masspoints = "adjust",
  bwcheck = 10,
  all = T,
  stdvars = F,
  deriv = 0,
  #  cluster = data.teryt.summary.rdd.3_e20_smdp$m_type,
  #  h = c(1924, 2863), # Fixed MSE band for DONUT !!!
  #  b = c(5154, 6062) # Fixed MSE band for DONUT !!!
)

summary(rdd_inf_out.smdp_pool_mse.cov_base)


rdd_inf_out.smdp_pool_cer.cov_base <- rdrobust(
  data.teryt.summary.rdd.3_e20_smdp$turnout,
  data.teryt.summary.rdd.3_e20_smdp$pop,
  kernel = "triangular",
  bwselect = "certwo",
  covs = covs.rdd_smdp, # !!!
  c = 20000,
  p = 1,
  q = 2,
  masspoints = "adjust",
  bwcheck = 10,
  all = T,
  stdvars = F,
  deriv = 0,
  #  cluster = data.teryt.summary.rdd.3_e20_smdp$m_type,
  #  h = c(1759.416, 2617.248), # Fixed CER band for DONUT !!!
  #  b = c(5154.053, 6062.418) # Fixed CER band for DONUT !!!
)

summary(rdd_inf_out.smdp_pool_cer.cov_base)


# mmdp

rdd_inf_out.mmdp_pool_mse.cov_base <- rdrobust(
  data.teryt.summary.rdd.3_e20_mmdp$turnout,
  data.teryt.summary.rdd.3_e20_mmdp$pop,
  kernel = "triangular",
  bwselect = "msetwo",
  covs = covs.rdd_mmdp,
  c = 20000,
  p = 1,
  q = 2,
  masspoints = "adjust",
  bwcheck = 10,
  all = T,
  stdvars = F,
  deriv = 0,
  #  cluster = data.teryt.summary.rdd.3_e20_mmdp$m_type,
  #  h = c(1434.746, 2835.929), # Fixed MSE band for DONUT !!!
  #  b = c(3648.768, 5884.218) # Fixed MSE band for DONUT !!!
)

summary(rdd_inf_out.mmdp_pool_mse.cov_base)


rdd_inf_out.mmdp_pool_cer.cov_base <- rdrobust(
  data.teryt.summary.rdd.3_e20_mmdp$turnout,
  data.teryt.summary.rdd.3_e20_mmdp$pop,
  kernel = "triangular",
  bwselect = "certwo",
  covs = covs.rdd_mmdp,
  c = 20000,
  p = 1,
  q = 2,
  masspoints = "adjust",
  bwcheck = 10,
  all = T,
  stdvars = F,
  deriv = 0,
  #  cluster = data.teryt.summary.rdd.3_e20_mmdp$m_type,
  #  h = c(1311.800, 2592.912), # Fixed CER band for DONUT !!!
  #  b = c(3648.768, 5884.218) # Fixed CER band for DONUT !!!
)

summary(rdd_inf_out.mmdp_pool_cer.cov_base)



## PLOT MAIN ESTIMATES ----

# Function to extract robust coefficient estimates, CIs, p-values, and effective observations

extract_robust_estimates <- function(rd_result, model_name) {
  robust_row <- which(rownames(rd_result$coef) == "Robust")
  
  estimate <- rd_result$coef[robust_row, 1]
  ci_lower <- rd_result$ci[robust_row, 1]
  ci_upper <- rd_result$ci[robust_row, 2]
  p_value <- rd_result$pv[robust_row, 1]
  
  n_left <- rd_result$N_h[1]  # Effective observations left of cutoff
  n_right <- rd_result$N_h[2]  # Effective observations right of cutoff
  
  data.frame(
    model = model_name,
    estimate = estimate,
    ci_lower = ci_lower,
    ci_upper = ci_upper,
    p_value = p_value,
    n_left = n_left,
    n_right = n_right
  )
}


# POOLED dist. 1998-2010 !!!

estimates <- bind_rows(
  extract_robust_estimates(rdd_inf_out.smdp_pool_mse.cov, "M1. SMDP-PR"),
  extract_robust_estimates(rdd_inf_out.smdp_pool_cer.cov, "M2. SMDP-PR [CER]"),
  extract_robust_estimates(rdd_inf_out.mmdp_pool_mse.cov, "M3. MMDP-PR"),
  extract_robust_estimates(rdd_inf_out.mmdp_pool_cer.cov, "M4. MMDP-PR [CER]"),
  #  extract_robust_estimates(rdd_inf_out.mmdp_pool_mse.cov.DM5, "M5. MMDP-PR [DM=5]"),
  #  extract_robust_estimates(rdd_inf_out.mmdp_pool_cer.cov.DM5, "M6. MMDP-PR [DM=5, CER]")
)

# OR

# POOLED [ALL] !!!

estimates <- bind_rows(
  extract_robust_estimates(rdd_inf_out.smdp_pool_mse.cov, "M1. SMDP-PR [clustervar]"),
  extract_robust_estimates(rdd_inf_out.smdp_pool_cer.cov, "M2. SMDP-PR [CER, clustervar]"),
  extract_robust_estimates(rdd_inf_out.mmdp_pool_mse.cov, "M3. MMDP-PR [clustervar]"),
  extract_robust_estimates(rdd_inf_out.mmdp_pool_cer.cov, "M4. MMDP-PR [CER, clustervar]"),
  extract_robust_estimates(rdd_inf_out.smdp_pool_mse.cov_base, "M5. SMDP-PR"),
  extract_robust_estimates(rdd_inf_out.smdp_pool_cer.cov_base, "M6. SMDP-PR [CER]"),
  extract_robust_estimates(rdd_inf_out.mmdp_pool_mse.cov_base, "M7. MMDP-PR"),
  extract_robust_estimates(rdd_inf_out.mmdp_pool_cer.cov_base, "M8. MMDP-PR [CER]"),
)



# Create a function to format and wrap the legend text
format_legend <- function(model, n_left, n_right) {
  text <- sprintf("%s\nN Left: %d\nN Right: %d", model, n_left, n_right)
  str_wrap(text, width = 16)  # Adjust width as needed
}

# Generate labels for each model
labels <- mapply(format_legend, 
                 estimates$model, 
                 estimates$n_left, 
                 estimates$n_right, 
                 SIMPLIFY = FALSE)

# PLOT def.

plot_dturn_POOL <- ggplot(estimates, aes(x = model, y = estimate)) +
  geom_point(size = 3, color = "purple") +
  geom_errorbar(aes(ymin = ci_lower, ymax = ci_upper), width = 0.1, linewidth = 0.7, color = "purple") +
  geom_hline(yintercept = 0, linetype = "dashed", color = "black") +
  labs(
    title = "RD Robust Coefficient Estimates",
    subtitle = "Point Estimates with 95% CI",
    x = "Model",
    y = "Robust Coefficient Estimate\n(Treatment Effect on Turnout, 1998-2010)",
    color = "Model"
  ) +
  theme_cowplot() +
  scale_y_continuous(labels = scales::number_format(accuracy = 0.01), limits = c(-0.02, 0.05), n.breaks = 8) + theme(panel.grid.minor = element_line(color = "#69b3a2", linewidth = 0.5, linetype = 3), axis.text.x = element_text(angle = 90))

# Display the plot
print(plot_dturn_POOL)

